library(lfe)
library(jtools)
library(stargazer)
Read file generated in Python
country <- "Romania"
year <- 2009
df <- read.csv("data_regressions/Romania_2009_polling_station.csv", encoding="iso8859_16")
df <- df[grepl("Traian", df$candidate), ] # Traian Băsescu
df
model_1 <- lm(rate ~ rate_tudor + rate_antonescu + rate_kelemen + rate_oprescu + rate_basescu, data = df)
model_2 <- lm(rate ~ divisiveness_tudor + divisiveness_antonescu + divisiveness_kelemen + divisiveness_oprescu + divisiveness_basescu, data = df)
model_3 <- lm(rate ~ rate_tudor + rate_antonescu + rate_kelemen + rate_oprescu + rate_basescu + divisiveness_tudor + divisiveness_antonescu + divisiveness_kelemen + divisiveness_oprescu + divisiveness_basescu, data = df)
stargazer(model_1, model_2, model_3, type = "text")
============================================================================================================
Dependent variable:
-------------------------------------------------------------------------------------
rate
(1) (2) (3)
------------------------------------------------------------------------------------------------------------
rate_tudor 0.536*** 0.556***
(0.018) (0.019)
rate_antonescu 0.266*** 0.277***
(0.006) (0.006)
rate_kelemen 0.427*** 0.398***
(0.004) (0.005)
rate_oprescu 0.432*** 0.384***
(0.025) (0.027)
rate_basescu 1.047*** 1.034***
(0.005) (0.005)
divisiveness_tudor -2.391*** -0.566***
(0.184) (0.089)
divisiveness_antonescu -1.091*** -0.329***
(0.070) (0.033)
divisiveness_kelemen 0.157*** 0.078***
(0.016) (0.009)
divisiveness_oprescu -0.251 1.315***
(0.240) (0.113)
divisiveness_basescu 1.289*** 0.162***
(0.058) (0.029)
Constant 0.030*** 0.548*** 0.036***
(0.003) (0.008) (0.005)
------------------------------------------------------------------------------------------------------------
Observations 11,107 11,107 11,107
R2 0.807 0.091 0.812
Adjusted R2 0.807 0.091 0.812
Residual Std. Error 0.053 (df = 11101) 0.115 (df = 11101) 0.052 (df = 11096)
F Statistic 9,305.346*** (df = 5; 11101) 223.174*** (df = 5; 11101) 4,805.735*** (df = 10; 11096)
============================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
library(minpack.lm)
model_1 <- nlsLM(
formula = rate ~ const + b_tudor*rate_tudor + b_antonescu*rate_antonescu + b_kelemen*rate_kelemen + b_oprescu*rate_oprescu + b_basescu*rate_basescu,
data = df,
start = list(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0),
lower = c(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0),
upper = c(const=1, b_tudor=1, b_antonescu=1, b_kelemen=1, b_oprescu=1, b_basescu=1),
#algorithm = "port"
)
model_2 <- nlsLM(
formula = rate ~ const + d_tudor*divisiveness_tudor + d_antonescu*divisiveness_antonescu + d_kelemen*divisiveness_kelemen + d_oprescu*divisiveness_oprescu + d_basescu*divisiveness_basescu,
data = df,
start = list(const=0, d_tudor=0, d_antonescu=0, d_kelemen=0, d_oprescu=0, d_basescu=0),
#lower = c(const=0, b_tudor=0, b_antonescu=0, b_becalli=0, b_kelemen=0, b_oprescu=0, b_basescu=0),
#upper = c(const=1, b_tudor=1, b_antonescu=1, b_becalli=1, b_kelemen=1, b_oprescu=1, b_basescu=1),
#algorithm = "port"
)
model_3 <- nlsLM(
formula = rate ~ const + b_tudor*rate_tudor + b_antonescu*rate_antonescu + b_kelemen*rate_kelemen + b_oprescu*rate_oprescu + b_basescu*rate_basescu + d_tudor*divisiveness_tudor + d_antonescu*divisiveness_antonescu + d_kelemen*divisiveness_kelemen + d_oprescu*divisiveness_oprescu + d_basescu*divisiveness_basescu,
data = df,
start = list(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0, d_tudor=0, d_antonescu=0, d_kelemen=0, d_oprescu=0, d_basescu=0),
lower = c(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0, d_tudor=-Inf, d_antonescu=-Inf, d_kelemen=-Inf, d_oprescu=-Inf, d_basescu=-Inf),
upper = c(const=1, b_tudor=1, b_antonescu=1, b_kelemen=1, b_oprescu=1, b_basescu=1, d_tudor=Inf, d_antonescu=Inf, d_kelemen=Inf, d_oprescu=Inf, d_basescu=Inf),
#algorithm = "port"
)
models <- list(model_1, model_2, model_3)
nlstargazer(models = models)
| Parameters | Model.1 | Model.2 | Model.3 |
|---|---|---|---|
| b_antonescu | 0.2468*** | 0.2629*** | |
| b_basescu | 1.0000*** | 1.0000*** | |
| b_kelemen | 0.4064*** | 0.3795*** | |
| b_oprescu | 0.4248*** | 0.3959*** | |
| b_tudor | 0.4866*** | 0.5243*** | |
| const | 0.0540*** | 0.5478*** | 0.0540*** |
| d_antonescu | -1.0911*** | -0.3548*** | |
| d_basescu | 1.2893*** | 0.1984*** | |
| d_kelemen | 0.1566*** | 0.0856*** | |
| d_oprescu | -0.2507 | 1.2528*** | |
| d_tudor | -2.3910*** | -0.6061*** | |
| Residual sum-of-squares | 31.33 | 146.62 | 30.39 |
library(minpack.lm)
model_test <- nlsLM(
formula = rate ~ const + b_tudor*rate_tudor + b_antonescu*rate_antonescu + b_kelemen*rate_kelemen + b_oprescu*rate_oprescu + b_basescu*rate_basescu + d_tudor*divisiveness_tudor*rate_tudor + d_antonescu*divisiveness_antonescu*rate_antonescu + d_kelemen*divisiveness_kelemen*rate_kelemen + d_oprescu*divisiveness_oprescu*rate_oprescu + d_basescu*divisiveness_basescu*rate_basescu,
data = df,
start = list(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0, d_tudor=0, d_antonescu=0, d_kelemen=0, d_oprescu=0, d_basescu=0),
lower = c(const=0, b_tudor=0, b_antonescu=0, b_kelemen=0, b_oprescu=0, b_basescu=0, d_tudor=-Inf, d_antonescu=-Inf, d_kelemen=-Inf, d_oprescu=-Inf, d_basescu=-Inf),
upper = c(const=1, b_tudor=1, b_antonescu=1, b_kelemen=1, b_oprescu=1, b_basescu=1, d_tudor=Inf, d_antonescu=Inf, d_kelemen=Inf, d_oprescu=Inf, d_basescu=Inf),
#algorithm = "port"
)
model_test
Nonlinear regression model
model: rate ~ const + b_tudor * rate_tudor + b_antonescu * rate_antonescu + b_kelemen * rate_kelemen + b_oprescu * rate_oprescu + b_basescu * rate_basescu + d_tudor * divisiveness_tudor * rate_tudor + d_antonescu * divisiveness_antonescu * rate_antonescu + d_kelemen * divisiveness_kelemen * rate_kelemen + d_oprescu * divisiveness_oprescu * rate_oprescu + d_basescu * divisiveness_basescu * rate_basescu
data: df
const b_tudor b_antonescu b_kelemen b_oprescu b_basescu d_tudor d_antonescu d_kelemen d_oprescu d_basescu
0.0312 0.7074 0.3590 0.5810 0.1078 0.9611 -4.6272 -0.8208 -0.6441 16.0734 0.6302
residual sum-of-squares: 30.47
Number of iterations to convergence: 3
Achieved convergence tolerance: 1.49e-08
data1r <- fread("data_output/Romania/2009_first_round.csv.gz", encoding="UTF-8")
data_1r <- data1r[ , .(value = sum(value)), by = candidate]
data_1r$rate <- data_1r$value / sum(data_1r$value)
data_1r[grepl("Traian", data_1r$candidate), "candidate"] <- "Traian Basescu"
candidates <- c(subset(data_1r, rate > 0.02)[["candidate"]])
candidates
[1] "Traian Basescu" "Crin Antonescu" "Mircea Geoană" "Corneliu Vadim Tudor" "Sorin Oprescu" "Hunor Kelemen"
coefs <- coef(model_1)
df_model <- data.frame(
candidate = c("Traian Basescu", "Crin Antonescu", "Corneliu Vadim Tudor", "Sorin Oprescu", "Hunor Kelemen"),
coef = c(coefs[["b_basescu"]], coefs[["b_antonescu"]], coefs[["b_tudor"]], coefs[["b_oprescu"]], coefs[["b_kelemen"]])
)
df_model <- merge(df_model, data_1r, all=TRUE)
df_model[["1_coef"]] <- 1 - df_model$coef
df_model[["to_candidate_a"]] <- df_model[["rate"]] * df_model[["coef"]]
df_model[["to_candidate_b"]] <- df_model[["rate"]] * df_model[["1_coef"]]
df_model
draw_sankey(df_model, candidates, c("Mircea Geoana", "Traian Basescu"))
[1] "Constantin Ninel Potârcă"
[1] "Mircea Geoana"
[1] FALSE
[1] "Constantin Rotaru"
[1] "Mircea Geoana"
[1] FALSE
[1] "Corneliu Vadim Tudor"
[1] "Mircea Geoana"
[1] FALSE
[1] "Crin Antonescu"
[1] "Mircea Geoana"
[1] FALSE
[1] "George Becali"
[1] "Mircea Geoana"
[1] FALSE
[1] "Gheorghe-Eduard Manole"
[1] "Mircea Geoana"
[1] FALSE
[1] "Hunor Kelemen"
[1] "Mircea Geoana"
[1] FALSE
[1] "Mircea Geoană"
[1] "Mircea Geoana"
[1] FALSE
[1] "Ovidiu Cristian Iane"
[1] "Mircea Geoana"
[1] FALSE
[1] "Remus Cernea"
[1] "Mircea Geoana"
[1] FALSE
[1] "Sorin Oprescu"
[1] "Mircea Geoana"
[1] FALSE
[1] "Traian Basescu"
[1] "Mircea Geoana"
[1] FALSE